library(tidyverse)
library(ggplot2)
library(tidyverse)
library(kableExtra)
library(reshape2)
library(dplyr)
library(network)
library(ggraph)
library(GGally)
library(sna)
library(ggplot2)
library(ggnetwork)
library(ggraph)
library(tidygraph)
library(ggsci)
library(data.table)
library(reshape2)
library(tidymodels)
library(furrr)
library(R.matlab)
library(BiocParallel)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(gtools)
library(graphlayouts)
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/results/inh/"
expression_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_singlenuclei/2nd_pass/snakemake-sn/input/"
resources_dir = params$resources_dir
# BN folders
data_prefix = "inh_m06"
bn_results_dir = "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/inh_modules/inh_m06/"
BN_run_dir = "/pastel/resources/bayesian_networks/CINDERellA/"
BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")
cat("Preparing phenotypes...")
## Preparing phenotypes...
### phenotype
phenotype = readRDS(paste0(resources_dir, "phenotypes/basic_Apr2022.rds"))
phenotype_dt = phenotype$data
metadata = phenotype$data
### Create custom phenotypes and includes resilience
phenotype_dt$ad_dementia_status = ifelse(phenotype_dt$cogdx_3gp<3,0,1)
phenotype_dt$cognitive_impairment_status = ifelse(phenotype_dt$cogdx_3gp<2,0,1)
phenotype_dt$gpath_sqrt = sqrt(phenotype_dt$gpath)
phenotype_dt$amyloid_sqrt = sqrt(phenotype_dt$amyloid)
phenotype_dt$tangles_sqrt = sqrt(phenotype_dt$tangles)
phenotype_dt$nft_sqrt = sqrt(phenotype_dt$nft)
phenotype_dt$plaq_d_sqrt = sqrt(phenotype_dt$plaq_d)
phenotype_dt$plaq_n_sqrt = sqrt(phenotype_dt$plaq_n)
phenotype_dt$tdp_43_binary = phenotype_dt$tdp_st2
phenotype_dt$dxpark_status = phenotype_dt$dxpark-1
resilience = read.csv(paste0(resources_dir, "resilience/resilience_capuano_july2022/R719_CR_ROSMAP.csv"))
resilience$projid2 = sprintf("%08d", resilience$projid) # Add leading zeros
phenotype_dt = phenotype_dt %>% dplyr::left_join(resilience[,-1], by = c("projid"="projid2"))
save(phenotype_dt, file = paste0(bn_results_dir,"phenotypes.RData"))
cat("Saved:", paste0(bn_results_dir,"phenotypes.RData"))
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/inh_modules/inh_m06/phenotypes.RData
Expression data
cat("Loading SE modules...")
## Loading SE modules...
mod2plot = 6 # Module we want to plot
# pval_cutoff = 0.05 # p-value cutoff for the BN
top_n_genes = 100 # Number of top genes to include in the BN
# Expression data for a single set:
exprData = read.table(paste0(expression_dir, "inh.txt"), header = T, stringsAsFactors = F, check.names = F)
expr_matx = as.data.frame(exprData) # Residuals of the expression
gene_modules = read.table(paste0(net_dir, "geneBycluster.txt"), header = T, stringsAsFactors = F) # clusters from SpeakEasy
# Select the expression values for the module of interest
to_plot = gene_modules$ensembl[gene_modules$cluster_lv3 == mod2plot]
expr_matx_mod = expr_matx[to_plot, ]
save(expr_matx_mod, file = paste0(bn_results_dir,"dataExp.RData"))
cat("\nSelecting modules based on association results...")
##
## Selecting modules based on association results...
load(paste0(bn_results_dir,"phenotypes.RData"))
load(paste0(bn_results_dir,"dataExp.RData"))
phenotype_match = phenotype_dt[match(colnames(expr_matx_mod),phenotype_dt$projid),]
expr_matx_mod_t <- as.matrix(t(expr_matx_mod))
#identical(rownames(data4linear_reg),phenotype_match$projid) # TRUE
#Covariates
covariates_ = c( "cogng_demog_slope",
"tangles_sqrt",
"amyloid_sqrt")
adj_by = c("msex","age_death")
data4linear_reg = cbind(expr_matx_mod_t,phenotype_match[,covariates_,drop=F],phenotype_match[,adj_by,drop=F])
scale_data = TRUE
matrix_pvalue = matrix(NA, nrow = length(covariates_), ncol = ncol(expr_matx_mod_t))
for (x in 1:length(covariates_)){
for (y in 1:ncol(expr_matx_mod_t)){
x_cov = covariates_[x]
y_mod = colnames(data4linear_reg)[y]
if(scale_data){
form = as.formula(paste(paste0("scale(",y_mod,")"),"~",paste0("scale(",x_cov,")"),"+",paste(adj_by,collapse = "+")))
}else{
form = as.formula(paste(y_mod,"~",x_cov,"+",paste(adj_by,collapse = "+")))
}
matrix_pvalue[x,y] <- coef(summary( lm(form, data = data4linear_reg) ))[1,"Pr(>|t|)"]
}
}
rownames(matrix_pvalue) = covariates_
colnames(matrix_pvalue) = colnames(expr_matx_mod_t)
matrix_pvalue_adj = matrix(p.adjust(as.vector(as.matrix(matrix_pvalue)), method='bonferroni'),ncol=ncol(matrix_pvalue))
matrix_pvalue_m = reshape2::melt(as.matrix(matrix_pvalue)) %>%
dplyr::group_by(Var2) %>% dplyr::summarise(pval = min(value)) %>% arrange(pval)
top_genes = as.character(matrix_pvalue_m$Var2[1:top_n_genes])
# Save the input expression for the BN
write_csv(as.data.frame(expr_matx_mod)[top_genes,], file = BN_run_data, col_names = F)
cat("\nSaved:", BN_run_data)
##
## Saved: /pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/inh_modules/inh_m06/inh_m06_exp.txt
# CINDERellA has two inputs: exp.txt and output_folder
setwd(BN_run_dir)
cmd_matlab_call = paste0("matlab -nodisplay -nojvm -nosplash -nodesktop -r")
cmd_matlab_param = paste0("runt=5000; data='",BN_run_data,"'; out_dir='",BN_output_dir,"'; run('",BN_run_dir,"CINDERellA.m')")
cmd_matlab_run = paste0(cmd_matlab_call, " \"",cmd_matlab_param,"\"")
cat(cmd_matlab_run)
system(cmd_matlab_run)
## [1] "/pastel/projects/speakeasy_dlpfc/BN/sn_dlpfc_ast_res/inh_modules/inh_m06/inh_m06_res"
Read results
Edges filtered
# BN_output_dir = paste0(bn_results_dir,data_prefix,"_res")
# BN_run_data = paste0(bn_results_dir,data_prefix,"_exp.txt")
# load(file = paste0(bn_results_dir,data_prefix,"_BN_input.RData")) # phenotype_match,gene_net_metadata,mod_eigengen
# load(file = paste0(bn_results_dir,data_prefix,"_dataExp.RData"))
edgefrq = read_tsv(paste0(BN_output_dir,"/edgefrq.txt"), col_names = c("A","B","freq"), show_col_types = F)
dataExp = expr_matx_mod
edges_df = na.omit(cbind(edgefrq, rownames(dataExp)[edgefrq$A], rownames(dataExp)[edgefrq$B]))
colnames(edges_df) = c("fromNode", "toNode", "weight", "fromAltName", "toAltName") # match names for Cytoscape input
edges_df$fromAltName = gsub("(.*)\\.(.*)","\\1",edges_df$fromAltName)
edges_df$toAltName = gsub("(.*)\\.(.*)","\\1",edges_df$toAltName)
edges_filtered = edges_df[abs(edges_df$weight)>0.45, ] # weight default = 0.33
rownames(edges_filtered) = NULL
# createDT(edges_filtered %>% arrange(-weight))
Nodes filtered
nodes_table = data.frame(nodeName = 1:nrow(dataExp), altName = gsub("(.*)\\.(.*)","\\1",rownames(dataExp))) %>% distinct()
nodes_table$altName = gsub("(.*)\\.(.*)","\\1",nodes_table$altName)
rownames(nodes_table) = NULL
nodes_table = na.omit(unique(nodes_table)) %>% left_join(unique(gene_modules[,c("ensembl","gene_name")]), by = c("altName"="ensembl"))
nodes_filtered = nodes_table[nodes_table$altName %in% unique(c(edges_filtered$fromAltName,edges_filtered$toAltName)), ]
nodes_filtered = nodes_filtered[! duplicated(nodes_filtered$altName), ]
createDT(nodes_filtered %>% left_join(reshape2::melt(as.matrix(matrix_pvalue)), by = c("altName"="Var2")) %>%
group_by(altName) %>% mutate(best_pval = min(value),
best_pheno = Var1[which.min(value)],
pvalues = paste0(formatC(value, format = "e", digits = 2), collapse = ";"),
phenotypes = paste0(Var1, collapse = ";")) %>%
select(nodeName, altName, gene_name, best_pval, best_pheno, pvalues, phenotypes) %>%
distinct() %>% arrange(best_pval))
BN plot
# Cairo::CairoPDF(file = "/pastel/Github_scripts/SpeakEasy_dlpfc/figures4paper/v3_may2024/bn_inh06.pdf", width = 16, height = 10)
plot_geneBN(edges_filtered = edges_filtered,
nodes_filtered = nodes_filtered)
## Using `stress` as default layout

Session
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggforce_0.3.3 igraph_2.0.3 graphlayouts_0.8.0
## [4] gtools_3.9.4 RColorBrewer_1.1-3 circlize_0.4.16
## [7] ComplexHeatmap_2.15.4 BiocParallel_1.28.3 R.matlab_3.6.2
## [10] furrr_0.3.1 future_1.33.2 yardstick_1.2.0
## [13] workflowsets_1.0.1 workflows_1.1.3 tune_1.1.2
## [16] rsample_1.2.0 recipes_1.0.8 parsnip_1.1.1
## [19] modeldata_1.2.0 infer_1.0.5 dials_1.2.0
## [22] scales_1.3.0 broom_1.0.5 tidymodels_1.1.1
## [25] data.table_1.15.4 ggsci_3.0.3 tidygraph_1.2.0
## [28] ggnetwork_0.5.13 sna_2.7-2 statnet.common_4.9.0
## [31] GGally_2.1.2 ggraph_2.0.5 network_1.18.2
## [34] reshape2_1.4.4 kableExtra_1.3.4 lubridate_1.9.3
## [37] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [40] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
## [43] tibble_3.2.1 ggplot2_3.5.0 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] shadowtext_0.1.1 backports_1.4.1 systemfonts_1.0.6
## [4] plyr_1.8.9 splines_4.1.2 websocket_1.4.1
## [7] crosstalk_1.2.1 listenv_0.9.1 digest_0.6.35
## [10] foreach_1.5.2 htmltools_0.5.8.1 viridis_0.6.2
## [13] fansi_1.0.6 magrittr_2.0.3 cluster_2.1.2
## [16] doParallel_1.0.17 tzdb_0.4.0 globals_0.16.3
## [19] gower_1.0.1 matrixStats_1.3.0 vroom_1.6.5
## [22] R.utils_2.12.2 svglite_2.1.3 hardhat_1.3.0
## [25] timechange_0.3.0 colorspace_2.1-0 rvest_1.0.4
## [28] ggrepel_0.9.5 xfun_0.43 crayon_1.5.2
## [31] jsonlite_1.8.8 survival_3.2-13 iterators_1.0.14
## [34] glue_1.7.0 polyclip_1.10-6 gtable_0.3.4
## [37] ipred_0.9-14 webshot_0.5.2 GetoptLong_1.0.5
## [40] shape_1.4.6.1 future.apply_1.11.2 BiocGenerics_0.40.0
## [43] Rcpp_1.0.12 viridisLite_0.4.2 clue_0.3-65
## [46] bit_4.0.5 GPfit_1.0-8 DT_0.30
## [49] stats4_4.1.2 lava_1.7.2.1 prodlim_2023.08.28
## [52] htmlwidgets_1.6.4 httr_1.4.7 pkgconfig_2.0.3
## [55] reshape_0.8.9 R.methodsS3_1.8.2 farver_2.1.1
## [58] nnet_7.3-16 sass_0.4.9 chromote_0.2.0
## [61] utf8_1.2.4 labeling_0.4.3 tidyselect_1.2.1
## [64] rlang_1.1.3 DiceDesign_1.9 later_1.3.2
## [67] munsell_0.5.1 tools_4.1.2 cachem_1.0.8
## [70] cli_3.6.2 generics_0.1.3 evaluate_0.23
## [73] fastmap_1.1.1 yaml_2.3.8 bit64_4.0.5
## [76] processx_3.8.4 knitr_1.46 R.oo_1.25.0
## [79] xml2_1.3.6 compiler_4.1.2 rstudioapi_0.16.0
## [82] png_0.1-8 lhs_1.1.6 tweenr_1.0.2
## [85] bslib_0.7.0 stringi_1.8.3 highr_0.10
## [88] ps_1.7.6 lattice_0.20-45 Matrix_1.6-1.1
## [91] vctrs_0.6.5 pillar_1.9.0 lifecycle_1.0.4
## [94] GlobalOptions_0.1.2 jquerylib_0.1.4 R6_2.5.1
## [97] promises_1.3.0 gridExtra_2.3 IRanges_2.28.0
## [100] parallelly_1.37.1 codetools_0.2-18 MASS_7.3-54
## [103] rjson_0.2.21 withr_3.0.0 S4Vectors_0.32.4
## [106] parallel_4.1.2 hms_1.1.3 rpart_4.1-15
## [109] timeDate_4022.108 coda_0.19-4.1 class_7.3-19
## [112] rmarkdown_2.26